/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: rdma.c,v 1.14 2005/06/29 00:23:16 eugene Exp $";

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>

#include <netinet/in.h>
#include <unistd.h>

#include "mx_auto_config.h"
#include "myriexpress.h"
#include "internal.h"
#include "rdma.h"


void
mx_pt_add_entry(uint64_t virt, uint64_t phys, int flags){
  mx_pte_t *pte;
  int pgd_offset;
  int pte_offset;
  int pmd_offset;

  pgd_offset = MX_PGD_OFFSET(virt);
  if (mx_pt.pmd[pgd_offset] == NULL){
    mx_pt.pmd[pgd_offset] = (mx_pmd_t *)calloc(1, sizeof(mx_pmd_t));
  }

  pmd_offset = MX_PMD_OFFSET(virt);
  if ((mx_pt.pmd[pgd_offset])->pte[pmd_offset] == NULL){
    (mx_pt.pmd[pgd_offset])->pte[pmd_offset] =
      (mx_pte_t *)calloc(1, sizeof(mx_pte_t));
    (mx_pt.pmd[pgd_offset])->count ++;
  }
  pte = (mx_pt.pmd[pgd_offset])->pte[pmd_offset];

  pte_offset = MX_PTE_OFFSET(virt);
  if (pte->phys[pte_offset] == 0){
    pte->count++;
  } else {
    flags = flags | (pte->phys[pte_offset] & MX_PT_FLAG_MASK);
  }
  pte->phys[pte_offset] = (phys & MX_PAGE_MASK) | flags;
}
 

void
mx_pt_del_entry(uint64_t virt, int flags){
  mx_pte_t *pte;
  int pgd_offset;
  int pte_offset;
  int pmd_offset;

  pgd_offset = MX_PGD_OFFSET(virt);
  if (mx_pt.pmd[pgd_offset] == NULL){
    return;
  }
  pmd_offset = MX_PMD_OFFSET(virt);
  if ((mx_pt.pmd[pgd_offset])->pte[pmd_offset] == NULL){
    return;
  }
  pte = (mx_pt.pmd[pgd_offset])->pte[pmd_offset];

  pte_offset = MX_PTE_OFFSET(virt);
  if (pte->phys[pte_offset] == 0){
    return;
  }

  if((pte->phys[pte_offset] & MX_PT_FLAG_MASK) == flags){
    pte->phys[pte_offset] = 0;
    pte->count--;
    if (pte->count > 0){
      return;
    }
    
    free(pte);
   
    (mx_pt.pmd[pgd_offset])->pte[pmd_offset] = NULL;
    (mx_pt.pmd[pgd_offset])->count --;
    if ((mx_pt.pmd[pgd_offset])->count > 0){
      return;
    }
    free(mx_pt.pmd[pgd_offset]);
    mx_pt.pmd[pgd_offset] = NULL;
  } else if ((pte->phys[pte_offset] & MX_PT_FLAG_MASK) ==
             (MX_RDMA_READ | MX_RDMA_WRITE)){
    pte->phys[pte_offset] &= ~flags;
  }
  return;
}

int
mx_pt_pte_present(uint64_t virt, int flags){
  mx_pte_t *pte;
  int pgd_offset;
  int pte_offset;
  int pmd_offset;

  pgd_offset = MX_PGD_OFFSET(virt);
  if (mx_pt.pmd[pgd_offset] == NULL){
    return 0;
  }
  pmd_offset = MX_PMD_OFFSET(virt);
  if ((mx_pt.pmd[pgd_offset])->pte[pmd_offset] == NULL){
    return 0;
  }
  pte = (mx_pt.pmd[pgd_offset])->pte[pmd_offset];

  pte_offset = MX_PTE_OFFSET(virt);
  if (pte->phys[pte_offset] == 0){
    return 0;
  }
  return (pte->phys[pte_offset]) & flags;
}

int
mx_rdma_allowed(uint64_t ptr, uint32_t len, int flags){
  uint32_t i;
  for(i = 0; i < len; i += MX_PAGE_SIZE){
    if (!mx_pt_pte_present(ptr + i, flags)){
      return 0;
    }
  }
  return 1;
}

void
mx_rdma_allow(uint64_t ptr, uint32_t len, int flags){
  uint32_t i;
  for(i = 0; i < len; i += MX_PAGE_SIZE){
    mx_pt_add_entry(ptr + i, 1, flags);
  }
}

void
mx_rdma_disable(uint64_t ptr, uint32_t len, uint32_t flags){
  uint32_t i;
  for(i = 0; i < len; i += MX_PAGE_SIZE){
    mx_pt_del_entry(ptr + i, flags);
  }
}

uint32_t
mx_add_window_handle(mx_endpoint_t endpoint, 
                     uint64_t ptr, 
                     int len,
                     uint32_t flags){
  int i;
  for(i = 0; i < MX_N_HNDL; i++){
    if (endpoint->handles.ptr[i] == 0){
      break;
    }
  }
  endpoint->handles.ptr[i] = ptr;
  endpoint->handles.len[i] = len;
  endpoint->handles.flags[i] = flags;
  return i;
}

void
mx_del_window_handle(mx_endpoint_t endpoint, uint32_t index){
  if (index >= MX_N_HNDL){
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("invalid handle\n"));
    return;
  }
  endpoint->handles.ptr[index] = 0;
  endpoint->handles.len[index] = 0;
  endpoint->handles.flags[index] = 0;
}


void
mx_process_put(mx_endpoint_t endpoint,
               struct mx_address_desc *ep, 
               uint32_t data_len,
               uint32_t rdma_handle,
               uint32_t rdma_offset,
               mx_request_t sender_request)
{

  static mx_status_code_t err = MX_STATUS_SUCCESS;
  static mx_request_t req;
  char drain_buf[256];

  uint64_t write_to = endpoint->handles.ptr[rdma_handle] + rdma_offset;
  uint32_t window_len = endpoint->handles.len[rdma_handle] - rdma_offset;
  int allow;

  mx_segment_t seg[2];
  mx_request_t req_nouse;

  req = sender_request;
  allow = mx_rdma_allowed(write_to, window_len, MX_RDMA_WRITE);

  if ((window_len >= data_len) && allow){
    int n;
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Received put: len = %d\n", data_len));
    n = mx_sock_read(ep->recv_sock, MX_PTR(write_to), data_len);
    if (n < 0){
      mx_close_recv_socket(endpoint, ep);
      fprintf(stderr, "Error reading message (put)\n");
      return;
    }
  } else {
    int nleft = data_len;
    err = MX_STATUS_REJECTED;
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Memory not registered\n"));
    while (nleft > 0){
      int n = MIN(nleft, sizeof(drain_buf));
      n = mx_sock_read(ep->recv_sock, drain_buf, n);
      if (n < 0){
        mx_close_recv_socket(endpoint, ep);
        fprintf(stderr, "Error draining message (put)\n");
        return;
      }
      nleft -= n;
    }
  }

  seg[0].segment_ptr = &req;
  seg[0].segment_length = sizeof(req);
  seg[1].segment_ptr = &err;
  seg[1].segment_length = sizeof(err);
  
  mx_isend_with_type(endpoint, seg, 2, ep->address, 0, NULL,
                     &req_nouse, MX_SR_TYPE_PUT_ACK, 0, 0, 0);
  return;
}


void
mx_process_get(mx_endpoint_t endpoint,
               struct mx_address_desc *ep,
               mx_request_t sender_request){
  struct mx_get_request greq;
  int rc;
  uint64_t match_info;
  mx_segment_t seg;
  uint32_t handle;
  uint32_t send_len;
  uint64_t send_from;
  uint32_t status;
  mx_request_t data_req;
  mx_return_t ret;

  rc = mx_sock_read(ep->recv_sock, &greq, sizeof(greq));
  handle = greq.handle;
  send_len = greq.len;

  pthread_mutex_lock(&Mx_po_lock);
  send_from = endpoint->handles.ptr[handle] + greq.offset;

  if ((endpoint->handles.ptr[handle] == 0) || 
      (!mx_rdma_allowed(send_from, send_len, MX_RDMA_READ))){
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("handle not found or memory not registered\n"));
    status = MX_STATUS_REJECTED;
    seg.segment_ptr = NULL;
    seg.segment_length = 0;
  } else {
    status = MX_STATUS_SUCCESS;
    seg.segment_ptr = MX_PTR(send_from);
    seg.segment_length = send_len;
  }
  match_info = (uint64_t)(uintptr_t)sender_request;
  pthread_mutex_unlock(&Mx_po_lock);

  /* TODO: isn't u.unmaskable, u.maskable uninitialized? */
  ret = mx_isend_with_type(endpoint, &seg, 1, ep->address,
                           match_info, NULL, &data_req,
			   MX_SR_TYPE_GET_DATA,
                           0, 0, status);
  if (ret != MX_SUCCESS){
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("get data: post fail\n"));
  }
}
